Preparation

Start R in the bash terminal and run the following lines to install the libraries.

    install.packages("e1071")
    install.packages("caret")
    install.packages("rworldmap")
    install.packages("maptools")
    install.packages("rgeos")
    install.packages("reshape")
    install.packages("randomForest")
library(ggplot2)
library(rgdal)
## Loading required package: sp
## Please note that rgdal will be retired by the end of 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
## 
## rgdal: version: 1.5-25, (SVN revision 1143)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
## Overwritten PROJ_LIB was /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rgdal/proj
library(raster)
library(rgeos)
## rgeos version: 0.5-7, (SVN revision 676)
##  GEOS runtime version: 3.8.1-CAPI-1.13.3 
##  Please note that rgeos will be retired by the end of 2023,
## plan transition to sf functions using GEOS at your earliest convenience.
##  Linking to sp version: 1.4-5 
##  Polygon checking: TRUE
library(reshape)
library(rasterVis)
## Loading required package: lattice
library(dismo)
library(InformationValue)
library(mgcv)
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:raster':
## 
##     getData
## This is mgcv 1.8-36. For overview type 'help("mgcv-package")'.
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(e1071)
## 
## Attaching package: 'e1071'
## The following object is masked from 'package:raster':
## 
##     interpolate
library(caret)
## 
## Attaching package: 'caret'
## The following objects are masked from 'package:InformationValue':
## 
##     confusionMatrix, precision, recall, sensitivity, specificity
set.seed(30)

Data Exploration

We will use Montane woodcreper (Lepidocolaptes lacrymiger) as example species.

This species has a large range, occurring from the coastal cordillera of Venezuela along the Andes south to south-east Peru and central Bolivia.

Read in points data

Load observation presence dataset

Let’s suppose that you have done field work and you have collected the bird presence in the Lepidocolaptes_lacrymiger_allpoints.csv file

 points_field <- read.csv("./geodata/shp/Lepidocolaptes_lacrymiger_allpoints.csv")
 str(points_field)
## 'data.frame':    3438 obs. of  3 variables:
##  $ lon            : num  -76.2 -76.2 -74.3 -74.3 -76.1 ...
##  $ lat            : num  3.98 3.93 4.61 4.61 4.75 ...
##  $ scientific_name: chr  "Lepidocolaptes_lacrymiger" "Lepidocolaptes_lacrymiger" "Lepidocolaptes_lacrymiger" "Lepidocolaptes_lacrymiger" ...

Morover you also download aditional points data from the https://www.gbif.org/ .

# gbif_points = gbif('Lepidocolaptes' , 'lacrymiger' , download=T , geo=T , ext=c(-82,-59,-21,14) , removeZeros=TRUE )
# save(gbif_points , file="~/SE_data/exercise/geodata/SDM/gbif_points.Rdata")
load("./geodata/SDM/gbif_points.Rdata")
points=rbind.data.frame(
  data.frame(lat=gbif_points$lat,lon=gbif_points$lon),
  data.frame(lat=points_field$lat,lon=points_field$lon)
)
str(points)
## 'data.frame':    34835 obs. of  2 variables:
##  $ lat: num  0.02978 5.3731 4.76848 0.00634 6.40736 ...
##  $ lon: num  -78.7 -75.9 -75.5 -78.7 -75.7 ...

Load the environmental data layers

rCld   <- raster("./geodata/cloud/SA_meanannual.tif")
# compute min and max
rCld   =  setMinMax(rCld)
rCldIA <- raster("./geodata/cloud/SA_intra.tif")
rCldIA =  setMinMax(rCldIA)
rElv   <- raster("./geodata/dem/SA_elevation_mn_GMTED2010_mn_msk.tif")
rElv   =  setMinMax(rElv)
rVeg   <- raster("./geodata/vegetation/SA_tree_mn_percentage_GFC2013.tif")
rVeg   =  setMinMax(rVeg)
rElv
## class      : RasterLayer 
## dimensions : 8400, 5880, 49392000  (nrow, ncol, ncell)
## resolution : 0.008333333, 0.008333333  (x, y)
## extent     : -83, -34, -56, 14  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## source     : SA_elevation_mn_GMTED2010_mn_msk.tif 
## names      : SA_elevation_mn_GMTED2010_mn_msk 
## values     : -400, 6736  (min, max)

Load expert range map

birdrange <- readOGR("./geodata/shp", "cartodb-query")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/erinstearns/Documents/git_repos/SE_data/exercise/geodata/shp", layer: "cartodb-query"
## with 2 features
## It has 7 fields
plot(rElv)
points(points$lon, points$lat, col = "red", cex = .3)
plot(birdrange,add=TRUE)

# indicate that these data are presences
presence <- matrix(1,nrow(points),1)
points <- cbind(points,presence)
head(points)
##         lat       lon presence
## 1  0.029785 -78.68224        1
## 2  5.373100 -75.89100        1
## 3  4.768477 -75.45283        1
## 4  0.006336 -78.67635        1
## 5  6.407356 -75.66417        1
## 6 11.107720 -74.04844        1
# building spatial dataframe
coordinates(points)=c('lon','lat')
str(points)
## Formal class 'SpatialPointsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  34835 obs. of  1 variable:
##   .. ..$ presence: num [1:34835] 1 1 1 1 1 1 1 1 1 1 ...
##   ..@ coords.nrs : int [1:2] 2 1
##   ..@ coords     : num [1:34835, 1:2] -78.7 -75.9 -75.5 -78.7 -75.7 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : NULL
##   .. .. ..$ : chr [1:2] "lon" "lat"
##   ..@ bbox       : num [1:2, 1:2] -81.1 -18.8 -62.6 11.2
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:2] "lon" "lat"
##   .. .. ..$ : chr [1:2] "min" "max"
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
##   .. .. ..@ projargs: chr NA
# assign projection
projection(points) <- "+proj=longlat +datum=WGS84" 

Loading eBird sampling dataset, in order to obtain “absence” data

# link to global sampling raster
# first crop with the gdal and then load the cropversion
system("gdal_translate -projwin -82 14 -59 -21 -co COMPRESS=DEFLATE -co ZLEVEL=9 ./geodata/SDM/eBirdSampling_filtered.tif ./geodata/SDM/eBirdSampling_filtered_crop.tif")
## Warning in system("gdal_translate -projwin -82 14 -59 -21 -co COMPRESS=DEFLATE
## -co ZLEVEL=9 ./geodata/SDM/eBirdSampling_filtered.tif ./geodata/SDM/
## eBirdSampling_filtered_crop.tif"): error in running command
gsampling <- raster("./geodata/SDM/eBirdSampling_filtered_crop.tif")
# assign projection
projection(gsampling)="+proj=longlat +datum=WGS84"
gsampling
## class      : RasterLayer 
## dimensions : 4200, 2760, 11592000  (nrow, ncol, ncell)
## resolution : 0.008333333, 0.008333333  (x, y)
## extent     : -82, -59, -20.99999, 14  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## source     : eBirdSampling_filtered_crop.tif 
## names      : eBirdSampling_filtered_crop
# convert to points within data region
samplingp <- as(gsampling,"SpatialPointsDataFrame")
samplingp <- samplingp[samplingp$eBirdSampling_filtered_crop>0,]
str(samplingp)
## Formal class 'SpatialPointsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  17420 obs. of  1 variable:
##   .. ..$ eBirdSampling_filtered_crop: num [1:17420] 1 2 1 2 1 1 1 4 1 2 ...
##   ..@ coords.nrs : num(0) 
##   ..@ coords     : num [1:17420, 1:2] -61 -61 -61 -61 -61 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : NULL
##   .. .. ..$ : chr [1:2] "x" "y"
##   ..@ bbox       : num [1:2, 1:2] -82 -21 -59 14
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:2] "x" "y"
##   .. .. ..$ : chr [1:2] "min" "max"
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
##   .. .. ..@ projargs: chr "+proj=longlat +datum=WGS84 +no_defs"
##   .. .. ..$ comment: chr "GEOGCRS[\"unknown\",\n    DATUM[\"World Geodetic System 1984\",\n        ELLIPSOID[\"WGS 84\",6378137,298.25722"| __truncated__
head(samplingp)
##      eBirdSampling_filtered_crop
## 2518                           1
## 2519                           2
## 2520                           1
## 2523                           2
## 2525                           1
## 2529                           1
 # edit column names
 colnames(samplingp@data) <- c("observation")
 samplingp$presence=0
 plot(samplingp, col="green",pch=21,cex=.5)#absences
 plot(points, col="red",add=TRUE)#presences
 plot(birdrange, col="cyan",add=TRUE)#species range

summary(samplingp)
## Object of class SpatialPointsDataFrame
## Coordinates:
##         min       max
## x -81.97917 -59.00417
## y -20.99583  13.99584
## Is projected: FALSE 
## proj4string : [+proj=longlat +datum=WGS84 +no_defs]
## Number of points: 17420
## Data attributes:
##   observation          presence
##  Min.   :   1.000   Min.   :0  
##  1st Qu.:   1.000   1st Qu.:0  
##  Median :   2.000   Median :0  
##  Mean   :   5.935   Mean   :0  
##  3rd Qu.:   3.000   3rd Qu.:0  
##  Max.   :1412.000   Max.   :0

combine presence and non-presence point datasets

pdata <- rbind(points[,"presence"],samplingp[,"presence"])
pdata@data[,c("lon","lat")] <- coordinates(pdata)
table(pdata$presence)
## 
##     0     1 
## 17420 34835

Plot the environmental data layers

env <- stack(c(rCld,rCldIA,rElv,rVeg))
env
## class      : RasterStack 
## dimensions : 8400, 5880, 49392000, 4  (nrow, ncol, ncell, nlayers)
## resolution : 0.008333333, 0.008333333  (x, y)
## extent     : -83, -34, -56, 14  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## names      : SA_meanannual, SA_intra, SA_elevation_mn_GMTED2010_mn_msk, SA_tree_mn_percentage_GFC2013 
## min values :           329,        0,                             -400,                             0 
## max values :         10000,     3815,                             6736,                         10000
# rename layers for convenience
vars <- c("cld","cld_ia","elev","forest")

names(env) <- vars
 
# visual result 
options(repr.plot.width=15, repr.plot.height=9)
 # check out the plot
plot(env)

Scaling and centering the environmental variables to zero mean and variance of 1

senv <- scale(env[[vars]])
senv
## class      : RasterBrick 
## dimensions : 8400, 5880, 49392000, 4  (nrow, ncol, ncell, nlayers)
## resolution : 0.008333333, 0.008333333  (x, y)
## extent     : -83, -34, -56, 14  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## source     : r_tmp_2022-12-12_175839_9108_30290.grd 
## names      :        cld,     cld_ia,       elev,     forest 
## min values : -3.9349032, -1.6168665, -1.0685917, -0.5696309 
## max values :   2.800472,   4.348538,   6.673642,   2.178319
# this operation is quite long. Would be possible to do in gdal? how?
hist(env)
## Warning in .hist1(raster(x, y[i]), maxpixels = maxpixels, main = main[y[i]], :
## 0% of the raster cells were used. 100000 values used.

## Warning in .hist1(raster(x, y[i]), maxpixels = maxpixels, main = main[y[i]], :
## 0% of the raster cells were used. 100000 values used.

## Warning in .hist1(raster(x, y[i]), maxpixels = maxpixels, main = main[y[i]], :
## 0% of the raster cells were used. 100000 values used.

## Warning in .hist1(raster(x, y[i]), maxpixels = maxpixels, main = main[y[i]], :
## 0% of the raster cells were used. 100000 values used.

hist(senv)
## Warning in .hist1(raster(x, y[i]), maxpixels = maxpixels, main = main[y[i]], :
## 0% of the raster cells were used. 100000 values used.

## Warning in .hist1(raster(x, y[i]), maxpixels = maxpixels, main = main[y[i]], :
## 0% of the raster cells were used. 100000 values used.

## Warning in .hist1(raster(x, y[i]), maxpixels = maxpixels, main = main[y[i]], :
## 0% of the raster cells were used. 100000 values used.

## Warning in .hist1(raster(x, y[i]), maxpixels = maxpixels, main = main[y[i]], :
## 0% of the raster cells were used. 100000 values used.

Annotate the point records with the scaled environmental data

df.xact <- raster::extract(senv,pdata,sp=T) 
df.xact <- (df.xact[! is.na(df.xact$forest),])

Correlation plots

## convert to 'long' format for easier plotting
df.xactl <- reshape::melt(df.xact@data,id.vars=c("lat","lon","presence"),variable.name="variable")
head(df.xactl)
##         lat       lon presence variable    value
## 1  0.029785 -78.68224        1      cld 2.551142
## 2  5.373100 -75.89100        1      cld 2.587358
## 3  4.768477 -75.45283        1      cld 2.646556
## 4  0.006336 -78.67635        1      cld 2.616609
## 5  6.407356 -75.66417        1      cld 1.810119
## 6 11.107720 -74.04844        1      cld 2.322707
tail(df.xactl)
##              lat       lon presence variable      value
## 209015 -20.97083 -70.13750        0   forest -0.5696309
## 209016 -20.97083 -68.56250        0   forest -0.5696309
## 209017 -20.97916 -70.15417        0   forest -0.5696309
## 209018 -20.98749 -68.55417        0   forest -0.5696309
## 209019 -20.99583 -70.13750        0   forest -0.5696309
## 209020 -20.99583 -67.42917        0   forest -0.5696309
ggplot(df.xactl,aes(x=value,y=presence))+facet_wrap(~variable)+
  geom_point()+
  stat_smooth(method = "lm", formula = y ~ x + I(x^2), col="red")+
  geom_smooth(method="gam",formula=y ~ s(x, bs = "cs")) + 
  theme(text = element_text(size = 20))  
## Warning: Removed 404 rows containing non-finite values (`stat_smooth()`).
## Removed 404 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 404 rows containing missing values (`geom_point()`).

Model Fitting

Cross validation

df.xact <- as.data.frame(df.xact)
df.xact$grp <- kfold(df.xact,2)
head(df.xact)
##   presence       lon       lat      cld     cld_ia     elev     forest
## 1        1 -78.68224  0.029785 2.551142 -1.2900593 1.081788  1.6430179
## 2        1 -75.89100  5.373100 2.587358 -1.4260987 1.266230  1.8469158
## 3        1 -75.45283  4.768477 2.646556 -1.3119507 3.890734 -0.2396022
## 4        1 -78.67635  0.006336 2.616609 -1.2947503 1.364961  1.7699732
## 5        1 -75.66417  6.407356 1.810119 -0.7943755 2.127684  0.5647226
## 6        1 -74.04844 11.107720 2.322707 -0.9710704 1.992064  0.5636234
##       lon.1     lat.1 grp
## 1 -78.68224  0.029785   1
## 2 -75.89100  5.373100   2
## 3 -75.45283  4.768477   2
## 4 -78.67635  0.006336   2
## 5 -75.66417  6.407356   1
## 6 -74.04844 11.107720   1
mdl.glm <- glm(presence~cld+cld_ia*I(cld_ia^2)+elev*I(elev^2)+forest, family=binomial(link=logit), data=subset(df.xact,grp==1))
summary(mdl.glm)
## 
## Call:
## glm(formula = presence ~ cld + cld_ia * I(cld_ia^2) + elev * 
##     I(elev^2) + forest, family = binomial(link = logit), data = subset(df.xact, 
##     grp == 1))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.9714  -0.0263   0.2261   0.3539   4.9487  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -5.97053    0.14431 -41.373  < 2e-16 ***
## cld                 1.21530    0.08013  15.167  < 2e-16 ***
## cld_ia              0.22476    0.09130   2.462   0.0138 *  
## I(cld_ia^2)         0.34291    0.03835   8.941  < 2e-16 ***
## elev                4.99094    0.19347  25.797  < 2e-16 ***
## I(elev^2)          -1.87208    0.12204 -15.339  < 2e-16 ***
## forest              1.02948    0.03866  26.630  < 2e-16 ***
## cld_ia:I(cld_ia^2) -0.16774    0.03372  -4.974 6.55e-07 ***
## elev:I(elev^2)      0.16445    0.02280   7.212 5.53e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 32879  on 25922  degrees of freedom
## Residual deviance: 11136  on 25914  degrees of freedom
##   (205 observations deleted due to missingness)
## AIC: 11154
## 
## Number of Fisher Scoring iterations: 8

Prediction

Calculate estimates of p(occurrence) for each cell. We can use the predict function in the raster package to make the predictions across the full raster grid and save the output.

pred.glm1 <- predict(mdl.glm,df.xact[which(df.xact$grp==1),vars],type="response")
pred.glm2 <- predict(mdl.glm,df.xact[which(df.xact$grp==2),vars],type="response")
plotROC(df.xact[which(df.xact$grp==1),"presence"],pred.glm1)

plotROC(df.xact[which(df.xact$grp==2),"presence"],pred.glm2)

Out mapping

p1 <- raster::predict(senv,mdl.glm,type="response")

Plot the results as a map:

options(repr.plot.width=8, repr.plot.height=9)
gplot(p1)+geom_tile(aes(fill=value))+
  scale_fill_gradientn(
    colours=c("blue","green","yellow","orange","red"),
    na.value = "transparent")+
  geom_polygon(aes(x=long,y=lat,group=group),
               data=fortify(birdrange),fill="transparent",col="darkred")+
  geom_point(aes(x = lon, y = lat), data = subset(df.xact,presence==1),col="black",size=0.5)+
  coord_equal()
## Regions defined for each Polygons

Cross Validation

The library caret allow an easy implementation of the Cross Validation for several models.

ctrl <- trainControl(method = "cv", number = 10)

mdl.glm.cv <- train(  as.factor(presence) ~cld+cld_ia*I(cld_ia^2)+elev*I(elev^2)+forest,
                      family=binomial(link=logit),  data = df.xact, method = "glm", trControl = ctrl,
                      metric='Accuracy', na.action=na.exclude)
mdl.glm.cv
## Generalized Linear Model 
## 
## 52255 samples
##     4 predictor
##     2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 46665, 46667, 46667, 46666, 46667, 46665, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.9236658  0.8212671

Predict in terms of presence/absence

pred.mdl.glm.cv.raw <- raster::predict(senv,mdl.glm.cv ,type="raw")
plot(pred.mdl.glm.cv.raw)

Predict and plotting in terms of probability

pred.mdl.glm.cv.prob1 <- raster::predict(senv,mdl.glm.cv ,type="prob" , index=1)
plot(pred.mdl.glm.cv.prob1)

pred.mdl.glm.cv.prob2 <- raster::predict(senv,mdl.glm.cv ,type="prob" , index=2)
plot(pred.mdl.glm.cv.prob2)

References

Random Forest model

Random Forest is an ensemble learning method for classification, regression and other tasks that operate by constructing a multitude of decision trees at training time and outputting the class that is the mode of the classes (classification) or mean/average prediction (regression) of the individual trees (source https://en.wikipedia.org/wiki/Random_forest)

The randomForest packages (https://cran.r-project.org/web/packages/randomForest/index.html) allows to run reggression or classification case in R. (check also ranger: A Fast Implementation of Random Forests https://cran.r-project.org/web/packages/ranger/index.html )

mdl.rf <- randomForest(as.factor(presence) ~ cld + cld_ia + elev +forest, data=subset(df.xact,grp==1),
                       na.action=na.exclude)
mdl.rf
## 
## Call:
##  randomForest(formula = as.factor(presence) ~ cld + cld_ia + elev +      forest, data = subset(df.xact, grp == 1), na.action = na.exclude) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 6.38%
## Confusion matrix:
##      0     1 class.error
## 0 7436  1118  0.13069909
## 1  537 16832  0.03091715

Predict as presence / absence

(This may take a while to run)

pred.rf.resp  <- raster::predict(senv , mdl.rf , type='response')
plot(pred.rf.resp)

Predict as probability of presence / absence

(This may take a while to run)

pred.rf.prob1 <- raster::predict(senv , mdl.rf , type='prob' , index=1)   
pred.rf.prob0 <- raster::predict(senv , mdl.rf , type='prob' , index=2)
plot(pred.rf.prob1)

plot(pred.rf.prob0)

Predict occurence presence

Create a raste with 10 km resoultion (0.083333333333)

occ = raster(nrows=420, ncols=276, xmn=-82, xmx=-59, ymn=-21, ymx=14, crs="+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" , vals=0)
occ
## class      : RasterLayer 
## dimensions : 420, 276, 115920  (nrow, ncol, ncell)
## resolution : 0.08333333, 0.08333333  (x, y)
## extent     : -82, -59, -21, 14  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## source     : memory
## names      : layer 
## values     : 0, 0  (min, max)
pointcount = function(r, pts){
  # make a raster of zeroes like the input
  r2 = r
  r2[] = 0
  # get the cell index for each point and make a table:
  counts = table(cellFromXY(r,pts))
  # fill in the raster with the counts from the cell index:
  r2[as.numeric(names(counts))] = counts
  return(r2)
 }

occ_raster <- pointcount(occ, points)
plot(occ_raster)

occ_point = rasterToPoints(occ_raster , spatial=TRUE)
str(occ_point)
## Formal class 'SpatialPointsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  115920 obs. of  1 variable:
##   .. ..$ layer: num [1:115920] 0 0 0 0 0 0 0 0 0 0 ...
##   ..@ coords.nrs : num(0) 
##   ..@ coords     : num [1:115920, 1:2] -82 -81.9 -81.8 -81.7 -81.6 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : NULL
##   .. .. ..$ : chr [1:2] "x" "y"
##   ..@ bbox       : num [1:2, 1:2] -82 -21 -59 14
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:2] "x" "y"
##   .. .. ..$ : chr [1:2] "min" "max"
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
##   .. .. ..@ projargs: chr "+proj=longlat +datum=WGS84 +no_defs"
##   .. .. ..$ comment: chr "GEOGCRS[\"unknown\",\n    DATUM[\"World Geodetic System 1984\",\n        ELLIPSOID[\"WGS 84\",6378137,298.25722"| __truncated__

Sampling the occurrence points by eliminating the extremes.

occ_point_sel = subset(occ_point , (layer > 0 & layer < 100)  )
hist(occ_point_sel@data$layer)

occ_point_sel_predictor <- raster::extract(senv,occ_point_sel,sp=T) 
str(occ_point_sel_predictor)
## Formal class 'SpatialPointsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  1098 obs. of  5 variables:
##   .. ..$ layer : num [1:1098] 1 3 1 80 12 1 1 1 1 1 ...
##   .. ..$ cld   : num [1:1098] -0.06682 -0.00901 0.26539 1.9306 1.72097 ...
##   .. ..$ cld_ia: num [1:1098] 2.652 2.53 -0.0313 0.1172 0.3408 ...
##   .. ..$ elev  : num [1:1098] -0.497 -0.517 -0.498 0.195 0.689 ...
##   .. ..$ forest: num [1:1098] -0.229 1.413 -0.293 1.963 1.639 ...
##   ..@ coords.nrs : num(0) 
##   ..@ coords     : num [1:1098, 1:2] -74.2 -74.1 -72.4 -74.1 -74 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : NULL
##   .. .. ..$ : chr [1:2] "x" "y"
##   ..@ bbox       : num [1:2, 1:2] -81.1 -18.8 -62.6 11.2
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:2] "x" "y"
##   .. .. ..$ : chr [1:2] "min" "max"
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
##   .. .. ..@ projargs: chr "+proj=longlat +datum=WGS84 +no_defs"
##   .. .. ..$ comment: chr "GEOGCRS[\"unknown\",\n    DATUM[\"World Geodetic System 1984\",\n        ELLIPSOID[\"WGS 84\",6378137,298.25722"| __truncated__
mdl.rf.occ <- randomForest( layer ~ cld + cld_ia + elev + forest , data=occ_point_sel_predictor )
mdl.rf.occ
## 
## Call:
##  randomForest(formula = layer ~ cld + cld_ia + elev + forest,      data = occ_point_sel_predictor) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 1
## 
##           Mean of squared residuals: 211.8068
##                     % Var explained: 0.78
pred.rf.occ <- raster::predict(senv , mdl.rf.occ , type='response')
plot(pred.rf.occ)